WIN_data_ingest

Data ingestion and initial analysis from FL WIN water quality database.

read in the WIN txt pipe-delimited file
# Read all lines from the file
all_lines <- readLines("data/tina_WIN_WAVES_UDOUJ_TH_1_20250304141103_89325.txt")

# Locate the header line (assumes it starts with "Organization ID")
header_index <- grep('^"Organization ID"', all_lines)[1]

# Print the skipped lines (everything before the header)
cat("Skipped lines:\n")
Skipped lines:
read in the WIN txt pipe-delimited file
cat(all_lines[1:(header_index - 1)], sep = "\n")
Requested report - WIN WAVES
PROJECT ID - PROJECT NAME = PROJ-001 - Surfacewater Quality Monitoring Network | WQMP - Water Quality Monitoring Program | JTDIEOFF - Julia Tuttle Die-Off Monitoring | FKNMSFIELD - FKNMS-DEP Field Sampling | WQMP - Water Quality Monitoring Program | EPA - Water Quality Monitoring Program | SJCUD RIVER SAMPLING - Water Quality Monitoring Program | BBWQ - Biscayne Bay Water Quality | WQMP - Water Quality Monitoring Program | BBWQ - Biscayne Bay Water Quality Monitoring Program | WQMP - Water Quality Monitoring Program | ERMWQ - PBC ERM Ambient Water Quality Monitoring | WQMP - Tampa Bay Water Alafia River Watershed Water Quality Monitoring
PROJECT INTENDED USE = Routine Monitoring | IWR Assessment/TMDL Listing | Modeling | 319 Program | BMAP | Trend Analysis | Event Response
COUNTY = BROWARD, FLORIDA | MARTIN, FLORIDA | MIAMI-DADE, FLORIDA | MONROE, FLORIDA | PALM BEACH, FLORIDA
DEP ANALYTE GROUP = General Physical-Chemical | Field Observation | Nutrients
DEP ANALYTE NAME - UNIT = Ammonia (N) - mg/kg | Ammonia (N) - mg/L | Nitrogen- Total Kjeldahl - mg/kg | Nitrogen- Total Kjeldahl - mg/L | Nitrate (N) - mg/L | Nitrate-Nitrite (N) - mg/kg | Nitrate-Nitrite (N) - mg/L | Nitrite (N) - mg/kg | Nitrite (N) - mg/L | Orthophosphate (P) - mg/L | pH - SU | Phosphorus- Total - mg/kg | Phosphorus- Total - mg/L | Salinity - PSU | Carbon- Organic - mg/kg | Carbon- Organic - mg/L | Nitrogen- Total - mg/L | Silica (SiO2) - mg/L | Dissolved Oxygen - mg/L | Temperature, Water - deg C | Carbon- Total - mg/kg | Silicate - mg/L
Report Run on MARCH 4, 2025
The data you are accessing are from the WIN Warehouse.  The WIN Warehouse is refreshed on a weekly basis with new data that are submitted to WIN.  The refresh process occurs on the weekend; data uploaded to WIN through each Friday are available in the WIN Warehouse by the following Monday.  WIN replaces Florida STORET as an active data repository.  Florida STORET data are accessible through STORET Public Access (SPA).
The following data met your selection criteria
read in the WIN txt pipe-delimited file
cat("\n\n")
read in the WIN txt pipe-delimited file
# Extract the header line
header_line <- all_lines[header_index]

# Determine the expected number of columns based on the header line
expected_cols <- length(strsplit(header_line, "\\|")[[1]])

# Extract all remaining lines (which may contain multi-line records)
raw_data_lines <- all_lines[(header_index + 1):length(all_lines)]

# Reassemble rows by combining lines until the number of delimiters (pipes) matches expectation.
combined_rows <- character(0)
temp_row <- ""

for (line in raw_data_lines) {
  # Start a new temporary row or append to the existing one
  temp_row <- if (temp_row == "") line else paste(temp_row, line, sep = "\n")
  
  # Count the number of pipe delimiters in temp_row
  n_delim <- length(gregexpr("\\|", temp_row)[[1]])
  
  # If the row has the expected number of delimiters (one less than columns), it's complete.
  if (n_delim == (expected_cols - 1)) {
    combined_rows <- c(combined_rows, temp_row)
    temp_row <- ""  # Reset for the next record
  }
}

# In case any data remains in temp_row, add it as a record
if (temp_row != "") {
  combined_rows <- c(combined_rows, temp_row)
}

# Reassemble the complete text with header and data rows
full_text <- paste(c(header_line, combined_rows), collapse = "\n")

# Read the data from the reassembled text
df <- read.table(text = full_text,
                 sep = "|",
                 header = TRUE,
                 quote = "\"",
                 fill = TRUE,
                 stringsAsFactors = FALSE)
Print the first few rows of the dataframe
# head(df)

library(pander)

pander(head(df), digits = 6)
Table continues below
Organization.ID Organization.Name Monitoring.Location.ID
21FLKNMS FLORIDA KEYS NATIONAL MARINE SANCTUARY G5AP0022
21FLKNMS FLORIDA KEYS NATIONAL MARINE SANCTUARY G5AP0008
21FLKNMS FLORIDA KEYS NATIONAL MARINE SANCTUARY G5AP0009
21FLKNMS FLORIDA KEYS NATIONAL MARINE SANCTUARY G5WA0004
21FLKNMS FLORIDA KEYS NATIONAL MARINE SANCTUARY MON-STOCKIS-2
21FLKNMS FLORIDA KEYS NATIONAL MARINE SANCTUARY G5WA0003
Table continues below
Monitoring.Location.Name Primary.Type Secondary.Type DEP.Latitude
DRTO - Loggerhead Forest Surface Water Estuary 24.6624
Pennekamp (Ocean Forest) Surface Water Estuary 25.2237
Pennekamp (Garden Cove)) Surface Water Estuary 25.1709
Lignumvitae Key (Indian Key Fill - Oceanside) Surface Water Estuary 24.893
Monroe-Stock Island-2 Surface Water Estuary 24.5676
Lignumvitae Key (Indian Key Fill - Bayside) Surface Water Estuary 24.8892
Table continues below
DEP.Longitude FHD.Reach.Code STORET.Monitoring.Location.ID County
-82.9274 NA MONROE
-80.3274 NA MONROE
-80.3671 NA MONROE
-80.669 NA MONROE
-81.7355 NA MONROE
-80.6776 NA MONROE
Table continues below
WBID Project.ID Activity.ID Activity.Start.Date.Time
8072 FKNMSFIELD 46142G5P00222 02/26/2022 15:24:00
6006B FKNMSFIELD 42509G5P0008A 02/24/2021 09:49:00
6006B FKNMSFIELD 43481G5P0009A 05/27/2021 09:09:00
8084 FKNMSFIELD 82820G5A00004 08/28/2020 13:50:00
6014B FKNMSFIELD 47034MO-STO-2 05/24/2022 10:24:00
8077H FKNMSFIELD 43186G5A00003 04/29/2021 11:40:00
Table continues below
Sampling.Agency.Name Activity.Depth Activity.Depth.Unit
FDEP FLORIDA KEYS NATIONAL MARINE SANCTUARY 1 m
FDEP FLORIDA KEYS NATIONAL MARINE SANCTUARY 0.5 m
FDEP FLORIDA KEYS NATIONAL MARINE SANCTUARY 0.5 m
FDEP FLORIDA KEYS NATIONAL MARINE SANCTUARY 0.5 m
FDEP FLORIDA KEYS NATIONAL MARINE SANCTUARY 0.5 m
FDEP FLORIDA KEYS NATIONAL MARINE SANCTUARY 0.5 m
Table continues below
DEP.Analyte.Name DEP.Result.Value.Number DEP.Result.Value.Text
Temperature, Water 24.67
Dissolved Oxygen 5.09
Dissolved Oxygen 1.55
Temperature, Water 32.7
pH 7.79
Dissolved Oxygen 6.79
DEP.Result.Unit DEP.MDL DEP.PQL Value.Qualifier
deg C NA NA
mg/L NA NA
mg/L NA NA
deg C NA NA
SU NA NA
mg/L NA NA

Organization Contributions

print table
library(dplyr)
library(tidyr)

# Ensure the result value column is numeric (coerce if needed)
df$DEP.Result.Value.Number <- as.numeric(df$DEP.Result.Value.Number)

# Create a table that, for each combination of Organization.ID and Monitoring.Location.ID,
# shows 1 if any non-NA DEP.Result.Value.Number exists for that DEP.Analyte.Name, 0 otherwise.
presence_table <- df %>%
  group_by(Organization.ID, Monitoring.Location.ID, DEP.Analyte.Name) %>%
  summarize(presence = ifelse(any(!is.na(DEP.Result.Value.Number)), 1, 0),
            .groups = "drop") %>%
  pivot_wider(names_from = DEP.Analyte.Name,
              values_from = presence,
              values_fill = list(presence = 0))

# Print the resulting table
# print(presence_table)

library(DT)

# Determine which columns correspond to analytes (i.e. presence/absence columns)
analyte_cols <- setdiff(names(presence_table), c("Organization.ID", "Monitoring.Location.ID"))

# Create an interactive datatable with pagination disabled (show all rows)
datatable(presence_table, options = list(paging = FALSE)) %>%
  formatStyle(
    columns = analyte_cols,
    backgroundColor = styleEqual(c(0, 1), c("lightcoral", "lightgreen"))
  )
create map
library(leaflet)
library(dplyr)

# Filter out rows with missing coordinate data
df_map <- df %>% filter(!is.na(DEP.Latitude) & !is.na(DEP.Longitude))

# Create a zoomable, interactive map with circle markers
leaflet(df_map) %>%
  addProviderTiles(providers$OpenStreetMap) %>%  # Use OpenStreetMap tiles
  addCircleMarkers(
    lng = ~DEP.Longitude, 
    lat = ~DEP.Latitude, 
    radius = 4,
    color = "blue",
    fillOpacity = 0.5,
    popup = ~paste("Organization:", Organization.ID, "<br>",
                   "Monitoring Location:", Monitoring.Location.ID)
  )

If you have more visualization ideas for this data, please open a github issue here.